By Xavier Collantes
Created: 9/15/2024; Updated: 7/25/2025
1pip install langchain-text-splitters langchain-community langgraph
2
1from langchain.chat_models import init_chat_model
2from langchain_core.language_models.chat_models import BaseChatModel
3
4
5MODEL_NAME: str = "gemini-2.0-flash"
6MODEL_PROVIDER: str = "google_genai"
7
8# Full list: https://python.langchain.com/docs/integrations/chat/
9llm: BaseChatModel = init_chat_model(MODEL_NAME, model_provider=MODEL_PROVIDER)
10
1from langchain_google_genai import GoogleGenerativeAIEmbeddings
2import os
3
4# Full list: https://python.langchain.com/docs/integrations/text_embedding/
5embeddings = GoogleGenerativeAIEmbeddings(
6 model="models/gemini-embedding-001",
7 # Make sure to set this in your environment or set this variable in your code.
8 api_key=os.getenv("GOOGLE_API_KEY"),
9)
10
1from langchain_chroma import Chroma
2
3vector_store = Chroma(
4 collection_name="example_collection",
5 embedding_function=embeddings,
6 persist_directory="./chroma_langchain_db", # Keeps a local SQLite file.
7)
8
1import bs4
2from langchain_core.documents import Document
3from langchain_community.document_loaders import WebBaseLoader
4from langchain_text_splitters import RecursiveCharacterTextSplitter
5
6# Some URLs will be blocked by my "Prove You're Human" bot-prevention.
7loader: WebBaseLoader = WebBaseLoader(
8 web_paths=(
9 "https://xaviercollantes.dev/articles/bulldog-band",
10 "https://xaviercollantes.dev/articles/faxion-ai",
11 "https://xaviercollantes.dev/articles/measuring-tokens",
12 "https://xaviercollantes.dev/articles/rpi-camera",
13 ),
14)
15docs: list[Document] = loader.load()
16
1[Document(metadata={'source': 'https://xaviercollantes.dev/articles/bulldog-band', ...'),
2 Document(metadata={'source': 'https://xaviercollantes.dev/articles/faxion-ai', ...),
3 Document(metadata={'source': 'https://xaviercollantes.dev/articles/measuring-tokens', ...),
4 Document(metadata={'source': 'https://xaviercollantes.dev/articles/rpi-camera', ...)]
5
This might still be needed if your input is too long.
1from langchain_text_splitters import RecursiveCharacterTextSplitter
2
3text_splitter: RecursiveCharacterTextSplitter = RecursiveCharacterTextSplitter(
4 chunk_size=200,
5 chunk_overlap=50
6)
7docs: list[Document] = text_splitter.split_documents(docs)
8print(f"Divided the 1 document into {len(docs)} chunks.")
9
1doc_ids: list[str] = vector_store.add_documents(documents=docs)
2print(f"Document IDs: {len(doc_ids)}: {doc_ids}")
3
1[Document(id='4d2d84a1-d93b-4342-90d6-812047d56882', metadata={'language': 'en-US', 'source': 'https://xaviercollantes.dev/articles/bulldog-band', 'title': 'Bulldog Band -
2 Document(id='d9bc138c-7330-47aa-8e64-42cdfda26799', metadata={'description': 'Tokens mean $$$ and how to measure them.', 'title': 'Measuring Tokens in LLMs - Xavier Collant
3 Document(id='7f72ab32-c2bb-424e-9aac-0f821ae222aa', metadata={'description': 'Architecting and leading the development of a groundbreaking AI fashion platform that reduce
4 Document(id='69d83587-f15a-4e27-a9fe-88d4ab0ca553', metadata={'title': 'FastAPI: Build your own APIs - Xavier Collantes', 'source': 'https://xaviercol
5
1InvalidArgumentError: Collection expecting embedding with dimension of 1024, got 3072
2
LangChain Hub
). LangChain also
has a built-in prompt template for RAG.1from langchain_core.messages import BaseMessage
2from langchain_core.prompts import ChatPromptTemplate
3
4# Create your own RAG prompt template.
5custom_prompt: ChatPromptTemplate = ChatPromptTemplate.from_messages(
6 [
7 (
8 "system",
9 "You are a helpful assistant that can answer questions about Xavier's blogs.\n\nContext:\n{context}",
10 ),
11 (
12 "human",
13 "{question}", # This is not Python string interpolation.
14 ),
15 ]
16)
17
1from pydantic import BaseModel, Field
2
3class State(BaseModel):
4 """State for the application."""
5
6 question: str = Field(default="", description="The user's input text.")
7 context: list[Document] = Field(
8 default_factory=list,
9 description="The documents retrieved from the vector store.",
10 )
11 answer: str = Field(default="", description="The LLM's answer to the question.")
12
13
14def retrieve_context(state: State) -> dict:
15 """Retrieves the most relevant documents from the vector store."""
16
17 retrieved_docs: list[Document] = vector_store.similarity_search(state.question)
18 # List of documents which are the most relevant to the question.
19 # "context" is the key for the value being returned and matches the key in
20 # the State object.
21 # print(f"Retrieved {len(retrieved_docs)} documents: {retrieved_docs}")
22 return {"context": retrieved_docs}
23
24
25def generate(state: State, prompt: ChatPromptTemplate, llm: BaseChatModel) -> dict:
26 """Performs the actual query to LLM."""
27
28 docs_content: str = "\n\n".join(doc.page_content for doc in state.context)
29 messages: list = prompt.invoke(
30 {"question": state.question, "context": docs_content}
31 )
32 response = llm.invoke(messages)
33 # "answer" is the key for the value being returned and matches the key in
34 # the State object.
35 # print(f"Generate: {response.content}")
36 return {"answer": response.content}
37
1### PLACE YOUR QUESTION HERE ###
2input_chat: str = "Where did Bulldog Band travel to?"
3
1state: State = State(question=input_chat)
2
3# Get relevant context using helper function.
4context_result: dict = retrieve_context(state)
5state.context = context_result["context"]
6
7# Generate answer using helper function.
8answer_result: dict = generate(state, custom_prompt, llm)
9state.answer = answer_result["answer"]
10
11# LangChain output is in a weird format.
12answer_words: list[str] = state.answer.split(" ")
13output_lines: str = ""
14line_len: int = 10
15curr_words: int = 0
16for word in answer_words:
17 curr_words -= 1
18 output_lines += word + " "
19 if curr_words == 0:
20 output_lines += "\n"
21 curr_words = line_len
22
23# This is the final answer.
24print(output_lines)
25
1The Bulldog Band traveled to a handful of cities across the United States, including:
2* Las Vegas
3* San Jose
4* Chicago
5* Phoenix
6
1from langgraph.graph import START, StateGraph
2
3# Chain everything together
4graph_builder = StateGraph(State).add_sequence([retrieve_context, generate])
5graph_builder.add_edge(START, "retrieve_context")
6graph = graph_builder.compile()
7
8# One-liner execution
9question = "what are the components?"
10result = graph.invoke({"question": question})
11
Related by topics: