mo-456 commited on
Commit
d284cb4
·
verified ·
1 Parent(s): 43c5362

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -20
app.py CHANGED
@@ -1,47 +1,43 @@
1
  from langchain_community.document_loaders import TextLoader
2
  from langchain.text_splitter import CharacterTextSplitter
3
  from langchain_community.embeddings import HuggingFaceEmbeddings
4
- from langchain_community.vectorstores import FAISS
 
5
  from langchain.chains import RetrievalQA
6
- from langchain.llms import HuggingFaceHub
7
  import gradio as gr
8
 
9
- # 1. Load Arabic knowledge base (plain text)
10
  loader = TextLoader("knowledge.txt", encoding="utf-8")
11
  docs = loader.load()
12
 
13
- # 2. Split text into smaller chunks
14
  text_splitter = CharacterTextSplitter(separator="\n", chunk_size=500, chunk_overlap=50)
15
  documents = text_splitter.split_documents(docs)
16
 
17
- # 3. Create embeddings using multilingual model
18
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
19
 
20
- # 4. Create FAISS vector store
21
- db = FAISS.from_documents(documents, embeddings)
22
  retriever = db.as_retriever()
23
 
24
- # 5. Load lightweight LLM from Hugging Face
25
  llm = HuggingFaceHub(
26
  repo_id="tiiuae/falcon-7b-instruct",
27
  model_kwargs={"temperature": 0.3, "max_new_tokens": 200}
28
  )
29
 
30
- # 6. Setup QA chain
31
  qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
32
 
33
- # 7. Define Gradio interface
34
  def answer_question(question):
35
- response = qa_chain.run(question)
36
- return response.strip()
37
 
38
- interface = gr.Interface(
39
  fn=answer_question,
40
- inputs=gr.Textbox(label="اكتب سؤالك هنا", lines=2, max_lines=2, max_chars=300),
41
  outputs=gr.Textbox(label="الإجابة"),
42
- title="شات بوت معرفي",
43
- description="أدخل سؤالك باللغة العربية للحصول على إجابة من قاعدة المعرفة."
44
- )
45
-
46
- # 8. Launch app (Hugging Face compatibility)
47
- interface.launch(share=True)
 
1
  from langchain_community.document_loaders import TextLoader
2
  from langchain.text_splitter import CharacterTextSplitter
3
  from langchain_community.embeddings import HuggingFaceEmbeddings
4
+ from langchain_community.vectorstores import Chroma
5
+ from langchain_community.llms import HuggingFaceHub
6
  from langchain.chains import RetrievalQA
 
7
  import gradio as gr
8
 
9
+ # 1. Load plain text file (Arabic)
10
  loader = TextLoader("knowledge.txt", encoding="utf-8")
11
  docs = loader.load()
12
 
13
+ # 2. Split into small chunks
14
  text_splitter = CharacterTextSplitter(separator="\n", chunk_size=500, chunk_overlap=50)
15
  documents = text_splitter.split_documents(docs)
16
 
17
+ # 3. Create multilingual embeddings
18
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
19
 
20
+ # 4. Use Chroma vector store (instead of FAISS)
21
+ db = Chroma.from_documents(documents, embeddings)
22
  retriever = db.as_retriever()
23
 
24
+ # 5. Load LLM from Hugging Face (free)
25
  llm = HuggingFaceHub(
26
  repo_id="tiiuae/falcon-7b-instruct",
27
  model_kwargs={"temperature": 0.3, "max_new_tokens": 200}
28
  )
29
 
30
+ # 6. Create QA chain
31
  qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
32
 
33
+ # 7. Gradio interface
34
  def answer_question(question):
35
+ return qa_chain.run(question)
 
36
 
37
+ gr.Interface(
38
  fn=answer_question,
39
+ inputs=gr.Textbox(label="اكتب سؤالك هنا", lines=2),
40
  outputs=gr.Textbox(label="الإجابة"),
41
+ title="شات بوت معرفي عربي",
42
+ description="أدخل سؤالك حول محتوى ملف المعرفة بالعربية.",
43
+ ).launch(share=True)